import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from lets_plot import *
LetsPlot.setup_html(no_js=True)In [3]:
In [4]:
plt.style.use(
"https://raw.githubusercontent.com/aeturrell/core_python/main/plot_style.txt"
)In [5]:
data_np = pd.read_excel(
"data/doing-economics-datafile-working-in-excel-project-2.xlsx",
usecols="A:Q",
header=1,
index_col="Period",
)
data_n = data_np.iloc[:10, :].copy()
data_p = data_np.iloc[14:24, :].copy()c:\Users\HP\anaconda3\Lib\site-packages\openpyxl\worksheet\_read_only.py:85: UserWarning: Unknown extension is not supported and will be removed
for idx, row in parser.parse():
In [6]:
test_data = {
"City A": [14.1, 14.1, 13.7],
"City B": [11.0, 12.6, 12.1],
}
# Original dataframe
test_df = pd.DataFrame.from_dict(test_data)
# A copy of the dataframe
test_copy = test_df.copy()
# A pointer to the dataframe
test_pointer = test_df
test_pointer.iloc[1, 1] = 99In [7]:
print("test_df=")
print(f"{test_df}\n")
print("test_copy=")
print(f"{test_copy}\n")test_df=
City A City B
0 14.1 11.0
1 14.1 99.0
2 13.7 12.1
test_copy=
City A City B
0 14.1 11.0
1 14.1 12.6
2 13.7 12.1
In [8]:
data_n.info()<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 1 to 10
Data columns (total 16 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Copenhagen 10 non-null object
1 Dnipropetrovs’k 10 non-null object
2 Minsk 10 non-null object
3 St. Gallen 10 non-null object
4 Muscat 10 non-null object
5 Samara 10 non-null object
6 Zurich 10 non-null object
7 Boston 10 non-null object
8 Bonn 10 non-null object
9 Chengdu 10 non-null object
10 Seoul 10 non-null object
11 Riyadh 10 non-null object
12 Nottingham 10 non-null object
13 Athens 10 non-null object
14 Istanbul 10 non-null object
15 Melbourne 10 non-null object
dtypes: object(16)
memory usage: 1.3+ KB
In [9]:
data_n = data_n.astype("double")
data_p = data_p.astype("double")In [10]:
mean_n_c = data_n.mean(axis=1)
mean_p_c = data_p.agg(np.mean, axis=1)C:\Users\HP\AppData\Local\Temp\ipykernel_16488\3801786469.py:2: FutureWarning: The provided callable <function mean at 0x000002B530AE80E0> is currently using DataFrame.mean. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "mean" instead.
mean_p_c = data_p.agg(np.mean, axis=1)
In [11]:
fig, ax = plt.subplots()
mean_n_c.plot(ax=ax, label="Without punishment")
mean_p_c.plot(ax=ax, label="With punishment")
ax.set_title("Average contributions to the public goods game")
ax.set_ylabel("Average contribution")
ax.legend();
In [12]:
partial_names_list = ["F. Kennedy", "Lennon", "Maynard Keynes", "Wayne"]
["John " + name for name in partial_names_list]['John F. Kennedy', 'John Lennon', 'John Maynard Keynes', 'John Wayne']
In [13]:
# Create new dataframe with bars in
compare_grps = pd.DataFrame(
[mean_n_c.loc[[1, 10]], mean_p_c.loc[[1, 10]]],
index=["Without punishment", "With punishment"],
)
# Rename columns to have 'round' in them
compare_grps.columns = ["Round " + str(i) for i in compare_grps.columns]
# Swap the column and index variables around with the transpose function, ready for plotting (.T is transpose)
compare_grps = compare_grps.T
# Make a bar chart
compare_grps.plot.bar(rot=0);
In [14]:
n_c = data_n.agg(["std", "var", "mean"], 1)
n_c| std | var | mean | |
|---|---|---|---|
| Period | |||
| 1 | 2.020724 | 4.083325 | 10.578313 |
| 2 | 2.238129 | 5.009220 | 10.628398 |
| 3 | 2.329569 | 5.426891 | 10.407079 |
| 4 | 2.068213 | 4.277504 | 9.813033 |
| 5 | 2.108329 | 4.445049 | 9.305433 |
| 6 | 2.240881 | 5.021549 | 8.454844 |
| 7 | 2.136614 | 4.565117 | 7.837568 |
| 8 | 2.349442 | 5.519880 | 7.376388 |
| 9 | 2.413845 | 5.826645 | 6.392985 |
| 10 | 2.187126 | 4.783520 | 4.383769 |
In [15]:
p_c = data_p.agg(["std", "var", "mean"], 1)In [16]:
fig, ax = plt.subplots()
n_c["mean"].plot(ax=ax, label="mean")
# mean + 2 standard deviations
(n_c["mean"] + 2 * n_c["std"]).plot(ax=ax, ylim=(0, None), color="red", label="±2 s.d.")
# mean - 2 standard deviations
(n_c["mean"] - 2 * n_c["std"]).plot(ax=ax, ylim=(0, None), color="red", label="")
for i in range(len(data_n.columns)):
ax.scatter(x=data_n.index, y=data_n.iloc[:, i], color="k", alpha=0.3)
ax.legend()
ax.set_ylabel("Average contribution")
ax.set_title("Contribution to public goods game without punishment")
plt.show();
In [17]:
fig, ax = plt.subplots()
p_c["mean"].plot(ax=ax, label="mean")
# mean + 2 sd
(p_c["mean"] + 2 * p_c["std"]).plot(ax=ax, ylim=(0, None), color="red", label="±2 s.d.")
# mean - 2 sd
(p_c["mean"] - 2 * p_c["std"]).plot(ax=ax, ylim=(0, None), color="red", label="")
for i in range(len(data_p.columns)):
ax.scatter(x=data_p.index, y=data_p.iloc[:, i], color="k", alpha=0.3)
ax.legend()
ax.set_ylabel("Average contribution")
ax.set_title("Contribution to public goods game with punishment")
plt.show();
In [18]:
data_p.apply(lambda x: x.max() - x.min(), axis=1)Period
1 10.199675
2 12.185065
3 12.689935
4 12.625000
5 12.140375
6 12.827541
7 13.098931
8 13.482621
9 13.496754
10 11.307360
dtype: float64
In [19]:
# A lambda function accepting three inputs, a, b, and c, and calculating the sum of the squares
test_function = lambda a, b, c: a**2 + b**2 + c**2
# Now we apply the function by handing over (in parenthesis) the following inputs: a=3, b=4 and c=5
test_function(3, 4, 5)50
In [20]:
range_function = lambda x: x.max() - x.min()
range_p = data_p.apply(range_function, axis=1)
range_n = data_n.apply(range_function, axis=1)In [21]:
fig, ax = plt.subplots()
range_p.plot(ax=ax, label="With punishment")
range_n.plot(ax=ax, label="Without punishment")
ax.set_ylim(0, None)
ax.legend()
ax.set_title("Range of contributions to the public goods game")
plt.show();
In [22]:
funcs_to_apply = [range_function, "max", "min", "std", "mean"]
summ_p = data_p.apply(funcs_to_apply, axis=1).rename(columns={"<lambda>": "range"})
summ_n = data_n.apply(funcs_to_apply, axis=1).rename(columns={"<lambda>": "range"})In [23]:
summ_n.loc[[1, 10], :].round(2)| range | max | min | std | mean | |
|---|---|---|---|---|---|
| Period | |||||
| 1 | 6.14 | 14.10 | 7.96 | 2.02 | 10.58 |
| 10 | 7.38 | 8.68 | 1.30 | 2.19 | 4.38 |
In [24]:
summ_p.loc[[1, 10], :].round(2)| range | max | min | std | mean | |
|---|---|---|---|---|---|
| Period | |||||
| 1 | 10.20 | 16.02 | 5.82 | 3.21 | 10.64 |
| 10 | 11.31 | 17.51 | 6.20 | 3.90 | 12.87 |
In [27]:
%pip install pingouinCollecting pingouin
Downloading pingouin-0.5.5-py3-none-any.whl.metadata (19 kB)
Requirement already satisfied: matplotlib in c:\users\hp\anaconda3\lib\site-packages (from pingouin) (3.9.2)
Requirement already satisfied: numpy in c:\users\hp\anaconda3\lib\site-packages (from pingouin) (1.26.4)
Requirement already satisfied: pandas>=1.5 in c:\users\hp\anaconda3\lib\site-packages (from pingouin) (2.2.2)
Collecting pandas-flavor (from pingouin)
Downloading pandas_flavor-0.6.0-py3-none-any.whl.metadata (6.3 kB)
Requirement already satisfied: scikit-learn>=1.2 in c:\users\hp\anaconda3\lib\site-packages (from pingouin) (1.5.1)
Requirement already satisfied: scipy in c:\users\hp\anaconda3\lib\site-packages (from pingouin) (1.13.1)
Requirement already satisfied: seaborn in c:\users\hp\anaconda3\lib\site-packages (from pingouin) (0.13.2)
Requirement already satisfied: statsmodels in c:\users\hp\anaconda3\lib\site-packages (from pingouin) (0.14.2)
Requirement already satisfied: tabulate in c:\users\hp\anaconda3\lib\site-packages (from pingouin) (0.9.0)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\hp\anaconda3\lib\site-packages (from pandas>=1.5->pingouin) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\hp\anaconda3\lib\site-packages (from pandas>=1.5->pingouin) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\hp\anaconda3\lib\site-packages (from pandas>=1.5->pingouin) (2023.3)
Requirement already satisfied: joblib>=1.2.0 in c:\users\hp\anaconda3\lib\site-packages (from scikit-learn>=1.2->pingouin) (1.4.2)
Requirement already satisfied: threadpoolctl>=3.1.0 in c:\users\hp\anaconda3\lib\site-packages (from scikit-learn>=1.2->pingouin) (3.5.0)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\hp\anaconda3\lib\site-packages (from matplotlib->pingouin) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\hp\anaconda3\lib\site-packages (from matplotlib->pingouin) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\hp\anaconda3\lib\site-packages (from matplotlib->pingouin) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\hp\anaconda3\lib\site-packages (from matplotlib->pingouin) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\hp\anaconda3\lib\site-packages (from matplotlib->pingouin) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\hp\anaconda3\lib\site-packages (from matplotlib->pingouin) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\hp\anaconda3\lib\site-packages (from matplotlib->pingouin) (3.1.2)
Requirement already satisfied: xarray in c:\users\hp\anaconda3\lib\site-packages (from pandas-flavor->pingouin) (2023.6.0)
Requirement already satisfied: patsy>=0.5.6 in c:\users\hp\anaconda3\lib\site-packages (from statsmodels->pingouin) (0.5.6)
Requirement already satisfied: six in c:\users\hp\anaconda3\lib\site-packages (from patsy>=0.5.6->statsmodels->pingouin) (1.16.0)
Downloading pingouin-0.5.5-py3-none-any.whl (204 kB)
Downloading pandas_flavor-0.6.0-py3-none-any.whl (7.2 kB)
Installing collected packages: pandas-flavor, pingouin
Successfully installed pandas-flavor-0.6.0 pingouin-0.5.5
Note: you may need to restart the kernel to use updated packages.
In [28]:
import pingouin as pgIn [29]:
pg.ttest(x=data_n.iloc[0, :], y=data_p.iloc[0, :])| T | dof | alternative | p-val | CI95% | cohen-d | BF10 | power | |
|---|---|---|---|---|---|---|---|---|
| T-test | -0.063782 | 30 | two-sided | 0.949567 | [-2.0, 1.87] | 0.02255 | 0.337 | 0.050437 |
In [30]:
pg.ttest(x=data_n.iloc[0, :], y=data_p.iloc[0, :], paired=True)| T | dof | alternative | p-val | CI95% | cohen-d | BF10 | power | |
|---|---|---|---|---|---|---|---|---|
| T-test | -0.149959 | 15 | two-sided | 0.882795 | [-0.92, 0.8] | 0.02255 | 0.258 | 0.05082 |